# -*- coding: utf-8 -*-
"""Example of using Angle-base outlier detection (ABOD) for outlier detection
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import os
import sys

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

from pyod.models.abod import ABOD
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize

if __name__ == "__main__":
    contamination = 0.1  # percentage of outliers
    n_train = 200  # number of training points
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=1,
                      contamination=contamination,
                      random_state=42)

    # train ABOD detector
    clf_name = 'ABOD'
    clf = ABOD()

    print ("=========1==========")
    print (X_train) #训练集
    print (y_train) #标注
    print (X_test) #测试集
    print ("=========2==========")


    clf.fit(X_train) ## 使用X_train训练检测器clf


    '''
    当检测器clf被初始化且fit(X)函数被执行后，clf就会生成两个重要的属性：
    
    labels_: 数据X上的异常标签，返回值为二分类标签（0为正常点，1为异常点）
    decision_scores: 数据X上的异常打分，分数越高，则该数据点的异常程度越高

    '''

    # get the prediction labels and outlier scores of the training data
    ## 返回训练数据X_train上的异常标签和异常分值
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers) # 返回训练数据上的分类标签 (0: 正常值, 1: 异常值)
    print (y_train_pred)
    print("=========3==========")
    y_train_scores = clf.decision_scores_  # raw outlier scores # 返回训练数据上的异常值 (分值越大越异常)
    print(y_train_scores)
    print("=========4==========")

    # get the prediction on the test data
    ## 用训练好的clf来预测未知数据中的异常值
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1) # 返回未知数据上的分类标签 (0: 正常值, 1: 异常值)
    print(y_test_pred)
    print("=========5==========")
    y_test_scores = clf.decision_function(X_test)  # outlier scores #  返回未知数据上的异常值 (分值越大越异常)
    print(y_test_scores)
    print("=========6==========")

'''
    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
              y_test_pred, show_figure=True, save_figure=False)
'''